### import datadf_raw<-read.csv(file=file.path(path,'data','raw_data','outcome.csv'))#### Clean data source("preprocess.R")df<-preprocess_data(df_raw)# items to use items_to_use<-'LSAS.SR'# options are #MADRS.S, LSAS.SR, PDSS.SRitemlabels<-itemlabels%>%as_tibble()%>%filter(str_detect(itemnr, items_to_use))# DIF+aux variables to use # DIF not used 'marital_status','children', 'working'dif_variables<-c('Patient','Treatment','sex','age','TreatmentAccessStart','education')### Make a backup of the dataframe, in case you need to revert changes at some pointdf.all<-dfprint(itemlabels)
# A tibble: 48 × 2
itemnr item
<chr> <chr>
1 LSAS.SR_1a phone anxiety
2 LSAS.SR_1b phone avoid
3 LSAS.SR_2a small group anxiety
4 LSAS.SR_2b small group avoid
5 LSAS.SR_3a eat public anxiety
6 LSAS.SR_3b eat public avoid
7 LSAS.SR_4a drink public anxiety
8 LSAS.SR_4b drink public avoid
9 LSAS.SR_5a talking authority anxiety
10 LSAS.SR_5b talking authority avoid
# ℹ 38 more rows
As we see here LSAS.SR is not used for all conditions over the course of their treatments. However during screening they are. We start by showing all timepoints, to draw targeting plots.
3.1 Remove missing
Code
##### Before filtering out participants, you should check the missing data structure using RImissing() and RImissingP()# If you want to include participants with missing data, input the minimum number of items responses that a participant should have to be included in the analysis:min.responses<-4# In our case if they have missing on one item all items for that time are missing. df_save<-df# Select the variables we will work with, and filter out respondents with missing datadf<-df%>%select(all_of(c(dif_variables,"Time",itemlabels$itemnr)))%>%# vfilter(rowSums(is.na(select(.,all_of(itemlabels$itemnr))))<min.responses)
4 Create DIF variables
Code
#---- Create DIF variables----# DIF variables into vectors, recoded as factors since DIF functions need this# these could also be stored in its own dataframe (not a tibble) instead of as vectors# Named vector for the new typestype_transform<-c(Treatment ="factor", sex ="factor", age ="numeric", TreatmentAccessStart ="integer",education="factor",Time='factor')# Transform columns based on the named vectordif<-df%>%mutate(across(names(type_transform), ~switch(type_transform[which(names(type_transform)==cur_column())], "integer"=as.integer(.), "character"=as.character(.),"factor"=as.factor(.), "numeric"=as.numeric(.))))%>%as.data.frame(.)%>%select(!all_of(itemlabels$itemnr))# then remove them from dataframe, since we need a dataframe with only item data for the Rasch analysesdf<-df%>%select(all_of(c("Time",itemlabels$itemnr)))# add time here ? dfnotime<-df%>%select(!Time)source("RISE_theme.R")
5 Demographics descriptives
Code
dif_spec<-dif%>%filter(Time=='SCREEN')%>%select(!all_of(c("Time","Patient")))summary(dif_spec)# RIdemographics(dif_spec,'Treatment') <- function crashes TODO make own
Treatment sex age TreatmentAccessStart education
MDD:2708 F:3700 Min. :18.0 Min. :2008 Primary : 460
PD :1507 M:2270 1st Qu.:26.0 1st Qu.:2012 Secondary :2905
SAD:1755 Median :33.0 Median :2014 Postsecondary:2605
Mean :35.2 Mean :2014
3rd Qu.:42.0 3rd Qu.:2017
Max. :84.0 Max. :2019
6 Overall number of responses
Code
# Collapsed RIallresp(dfnotime)# Seperate RIallresp_over_times<-df%>%# split(.$Time)%>%# split the data map(~RIallresp(.x%>%dplyr::select(!Time)))#+ labs(title = .x$Time)) # create separate for each time# make nice later RI_allresp_kable_grid=combine_kables_grid(RIallresp_over_times,cols=3)RI_allresp_kable_grid
Response category
Number of responses
Percent
0
319302
29.8
1
353013
32.9
2
229850
21.4
3
170299
15.9
x
SCREEN.Response category
SCREEN.Number of responses
SCREEN.Percent
PRE.Response category
PRE.Number of responses
PRE.Percent
WEEK01.Response category
WEEK01.Number of responses
WEEK01.Percent
0
107419
37.5
0
17550
22.7
0
16590
23.5
1
88404
30.9
1
21933
28.3
1
21555
30.5
2
52737
18.4
2
19674
25.4
2
17594
24.9
3
38000
13.3
3
18219
23.5
3
14869
21.1
WEEK02.Response category
WEEK02.Number of responses
WEEK02.Percent
WEEK03.Response category
WEEK03.Number of responses
WEEK03.Percent
WEEK04.Response category
WEEK04.Number of responses
WEEK04.Percent
0
17496
23.7
0
18117
24.9
0
18130
25.5
1
23433
31.8
1
23689
32.6
1
23868
33.6
2
18028
24.5
2
17184
23.6
2
16563
23.3
3
14723
20.0
3
13682
18.8
3
12527
17.6
WEEK05.Response category
WEEK05.Number of responses
WEEK05.Percent
WEEK06.Response category
WEEK06.Number of responses
WEEK06.Percent
WEEK07.Response category
WEEK07.Number of responses
WEEK07.Percent
0
17827
26.9
0
17209
27.2
0
17083
28.2
1
22505
33.9
1
21904
34.6
1
21632
35.7
2
14965
22.6
2
14034
22.2
2
13193
21.7
3
11039
16.6
3
10069
15.9
3
8764
14.4
WEEK08.Response category
WEEK08.Number of responses
WEEK08.Percent
WEEK09.Response category
WEEK09.Number of responses
WEEK09.Percent
WEEK10.Response category
WEEK10.Number of responses
WEEK10.Percent
0
17056
29.3
0
16990
30.6
0
16481
31.5
1
20934
35.9
1
20218
36.5
1
19332
37.0
2
12323
21.1
2
11288
20.4
2
10333
19.8
3
7959
13.7
3
6944
12.5
3
6126
11.7
POST.Response category
POST.Number of responses
POST.Percent
" "
" "
0
21354
33.2
1
23606
36.7
2
11934
18.6
3
7378
11.5
7 Descriptives of raw data
Paste descriptive part here after rasch itterations
8 Timepoint decision
Due to the amount of data across timepoints targeting will first be inspected, and the best fitting timepoint will be chosen based on targeting, which in turn will inform the rest of the analyses.
8.1 Targeting
Code
# increase fig-height above as needed, if you have many items og. value was 5. #RItargeting(dfnotime)require(patchwork)targeting_time_plots<-ggplots_with_spec_func_over_time(df,func_call =RItargeting,title_all='', blankx=FALSE,colsplot =3,rowsplot=5,return_list_plots=TRUE)good_layout_targeting_plots<-wrap_plots(targeting_time_plots, ncol =3)+plot_layout(guides="collect")#print(good_layout_targeting_plots)ggsave( filename =paste0("./plots/",items_to_use,"_targeting_over_time.png"), plot =good_layout_targeting_plots, width =20, # Increase width (in inches) height =60, # Increase height (in inches) dpi =300, # High resolution limitsize=FALSE)print(good_layout_targeting_plots)
Pre has very good targeting looks promising. We also note that this is only for patient in treatment for social anxiety disorder (no other treatment-conditions are mixed in PRE). We doublecheck how much data we have for each threshold. (Recomm. 20+).
We see that the EFA essentially captures the item pairs of each question. Even constraining the factors two 2, does not confirm that fear and avoidance load on different factors. EFA is thus not very informative except that the items are not unidimensional. And we move on with the 19 items, and with the a/b distinction in mind - ergo that they have high residual correlations - aim to remove atleast 1 of each item pair. In regards to the mokken part due note that the talking to new people and being with strangers have the higest H.
MSQ values based on conditional estimation (n = 1612 complete cases).
Code
# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1<-RIgetfit(df_ls, iterations =300, cpu =n_cores)# RIitemfit(df_ls, simfit1)
Item
InfitMSQ
Infit thresholds
OutfitMSQ
Outfit thresholds
Infit diff
Outfit diff
Location
LSAS.SR_2a
1.077
[0.934, 1.086]
1.068
[0.913, 1.126]
no misfit
no misfit
-0.64
LSAS.SR_2b
0.992
[0.93, 1.085]
0.995
[0.896, 1.1]
no misfit
no misfit
0.15
LSAS.SR_5b
1.117
[0.93, 1.068]
1.144
[0.919, 1.08]
0.049
0.064
0.08
LSAS.SR_8a
1.143
[0.925, 1.069]
1.17
[0.923, 1.07]
0.074
0.1
0.17
LSAS.SR_8b
1.166
[0.932, 1.075]
1.189
[0.921, 1.088]
0.091
0.101
0.74
LSAS.SR_10a
0.941
[0.934, 1.079]
0.934
[0.927, 1.072]
no misfit
no misfit
-0.08
LSAS.SR_10b
0.952
[0.935, 1.068]
0.956
[0.923, 1.077]
no misfit
no misfit
0.32
LSAS.SR_11a
0.693
[0.913, 1.091]
0.688
[0.896, 1.132]
0.22
0.208
-0.24
LSAS.SR_11b
0.748
[0.929, 1.075]
0.739
[0.906, 1.152]
0.181
0.167
0.04
LSAS.SR_12a
0.774
[0.934, 1.067]
0.758
[0.913, 1.076]
0.16
0.155
-0.24
LSAS.SR_12b
0.798
[0.915, 1.064]
0.789
[0.909, 1.091]
0.117
0.12
0.08
LSAS.SR_14a
1.039
[0.926, 1.088]
1.024
[0.913, 1.106]
no misfit
no misfit
-0.23
LSAS.SR_14b
1.026
[0.924, 1.081]
1.002
[0.91, 1.094]
no misfit
no misfit
0.64
LSAS.SR_16a
1.259
[0.928, 1.085]
1.23
[0.91, 1.094]
0.174
0.136
-1.87
LSAS.SR_18a
0.969
[0.942, 1.081]
0.965
[0.937, 1.1]
no misfit
no misfit
-0.67
LSAS.SR_18b
1.042
[0.925, 1.069]
1.038
[0.916, 1.101]
no misfit
no misfit
-0.58
LSAS.SR_19a
1.132
[0.933, 1.072]
1.142
[0.922, 1.086]
0.06
0.056
0.58
LSAS.SR_22a
1.134
[0.924, 1.077]
1.153
[0.918, 1.097]
0.057
0.056
0.66
LSAS.SR_24a
1.159
[0.931, 1.082]
1.162
[0.913, 1.113]
0.077
0.049
1.07
Note:
MSQ values based on conditional calculations (n = 1612 complete cases). Simulation based thresholds from 300 simulated datasets.
Item Var gamma se pvalue padj.BH sig lower upper
1 LSAS.SR_2a grouping_based_on_score NaN NaN NA NA ? NA NA
2 LSAS.SR_2b grouping_based_on_score NaN NaN NA NA ? NA NA
3 LSAS.SR_5b grouping_based_on_score NaN NaN NA NA ? NA NA
4 LSAS.SR_8a grouping_based_on_score NaN NaN NA NA ? NA NA
5 LSAS.SR_8b grouping_based_on_score NaN NaN NA NA ? NA NA
6 LSAS.SR_10a grouping_based_on_score NaN NaN NA NA ? NA NA
7 LSAS.SR_10b grouping_based_on_score NaN NaN NA NA ? NA NA
8 LSAS.SR_11a grouping_based_on_score NaN NaN NA NA ? NA NA
9 LSAS.SR_11b grouping_based_on_score NaN NaN NA NA ? NA NA
10 LSAS.SR_12a grouping_based_on_score NaN NaN NA NA ? NA NA
11 LSAS.SR_12b grouping_based_on_score NaN NaN NA NA ? NA NA
12 LSAS.SR_14a grouping_based_on_score NaN NaN NA NA ? NA NA
13 LSAS.SR_14b grouping_based_on_score NaN NaN NA NA ? NA NA
14 LSAS.SR_16a grouping_based_on_score NaN NaN NA NA ? NA NA
15 LSAS.SR_18a grouping_based_on_score NaN NaN NA NA ? NA NA
16 LSAS.SR_18b grouping_based_on_score NaN NaN NA NA ? NA NA
17 LSAS.SR_19a grouping_based_on_score NaN NaN NA NA ? NA NA
18 LSAS.SR_22a grouping_based_on_score NaN NaN NA NA ? NA NA
19 LSAS.SR_24a grouping_based_on_score NaN NaN NA NA ? NA NA
We see here that the biggest problem is the residual correlations coming from the posed situations having an fear component (a) and an avoidance component (b) and they are highly correlated. Yet from the mokken they do seem to load on the same dimension (party confirmed with the EFA) - and while the PCA and the component plot in the rasch analyses is more nauanced it is not decisive. The targeting across a/b is mostly equivalent.
Marginal differences here and small overfit. 2b had also smaller residual correlations to other items (including a items below which will be kept) Keeping 2b
Of 8 a/b a had a more even distribution
residual cor = 0.66
b underfit = 82%
Of 10 a/b a had a more even distribution
residual cor = 0.68
b overfit = 14%
Of 11 a/b b had a more even distribution
residual cor = 0.28
a overfit = 100%
Of 12 a/b b had a more even distirbution
residual cor = 0.54
a overfit = 100%
b overfit = 99.8% However the residual correlation between 12 b and 11b was high .44 . And correlation between 11b and 12a was lower (.28) keeping 12a for now.
Of 14 a/b a had a more even distribution
residual cor = 0.57
a and b no misfit
Of 18 a/b b had a more even distirbution
residual cor = 0.65
a overfit = 14%
Thus in the first round we remove 2a, 8b, 10b, 11a, 12a, 14b, 18a.
MSQ values based on conditional estimation (n = 1612 complete cases).
Code
# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1<-RIgetfit(df_ls, iterations =300, cpu =n_cores)# RIitemfit(df_ls, simfit1)
Item
InfitMSQ
Infit thresholds
OutfitMSQ
Outfit thresholds
Infit diff
Outfit diff
Location
LSAS.SR_2b
1
[0.924, 1.08]
1.004
[0.905, 1.105]
no misfit
no misfit
0.16
LSAS.SR_5b
1.053
[0.925, 1.057]
1.057
[0.922, 1.068]
no misfit
no misfit
0.09
LSAS.SR_8a
1.125
[0.913, 1.087]
1.149
[0.916, 1.089]
0.038
0.06
0.18
LSAS.SR_10a
0.948
[0.928, 1.074]
0.953
[0.925, 1.087]
no misfit
no misfit
-0.06
LSAS.SR_11b
0.784
[0.921, 1.078]
0.778
[0.876, 1.154]
0.137
0.098
0.06
LSAS.SR_12a
0.793
[0.941, 1.086]
0.782
[0.928, 1.109]
0.148
0.146
-0.22
LSAS.SR_14a
1.032
[0.926, 1.085]
1.023
[0.914, 1.091]
no misfit
no misfit
-0.20
LSAS.SR_16a
1.186
[0.918, 1.099]
1.156
[0.917, 1.09]
0.087
0.066
-1.74
LSAS.SR_18b
1.047
[0.911, 1.092]
1.056
[0.9, 1.113]
no misfit
no misfit
-0.53
LSAS.SR_19a
1.041
[0.934, 1.091]
1.038
[0.925, 1.089]
no misfit
no misfit
0.57
LSAS.SR_22a
1.046
[0.935, 1.078]
1.033
[0.921, 1.089]
no misfit
no misfit
0.65
LSAS.SR_24a
1.046
[0.92, 1.066]
1.013
[0.914, 1.082]
no misfit
no misfit
1.04
Note:
MSQ values based on conditional calculations (n = 1612 complete cases). Simulation based thresholds from 300 simulated datasets.
Item Var gamma se pvalue padj.BH sig lower upper
1 LSAS.SR_2b grouping_based_on_score NaN NaN NA NA ? NA NA
2 LSAS.SR_5b grouping_based_on_score NaN NaN NA NA ? NA NA
3 LSAS.SR_8a grouping_based_on_score NaN NaN NA NA ? NA NA
4 LSAS.SR_10a grouping_based_on_score NaN NaN NA NA ? NA NA
5 LSAS.SR_11b grouping_based_on_score NaN NaN NA NA ? NA NA
6 LSAS.SR_12a grouping_based_on_score NaN NaN NA NA ? NA NA
7 LSAS.SR_14a grouping_based_on_score NaN NaN NA NA ? NA NA
8 LSAS.SR_16a grouping_based_on_score NaN NaN NA NA ? NA NA
9 LSAS.SR_18b grouping_based_on_score NaN NaN NA NA ? NA NA
10 LSAS.SR_19a grouping_based_on_score NaN NaN NA NA ? NA NA
11 LSAS.SR_22a grouping_based_on_score NaN NaN NA NA ? NA NA
12 LSAS.SR_24a grouping_based_on_score NaN NaN NA NA ? NA NA
Most misfit is 11b and 12a both are cardinal symptoms. Both equally overfit (100%). 11b has slightly more residual correlations (12a not included). 11b has more DIF difference in age. 11b slight more overfit. Removing 11b.
MSQ values based on conditional estimation (n = 1612 complete cases).
Code
# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1<-RIgetfit(df_ls, iterations =300, cpu =n_cores)# RIitemfit(df_ls, simfit1)
Item
InfitMSQ
Infit thresholds
OutfitMSQ
Outfit thresholds
Infit diff
Outfit diff
Location
LSAS.SR_2b
0.996
[0.943, 1.073]
0.997
[0.935, 1.089]
no misfit
no misfit
0.16
LSAS.SR_5b
1.04
[0.929, 1.071]
1.043
[0.924, 1.081]
no misfit
no misfit
0.09
LSAS.SR_8a
1.062
[0.913, 1.093]
1.083
[0.908, 1.097]
no misfit
no misfit
0.18
LSAS.SR_10a
0.962
[0.93, 1.091]
0.962
[0.884, 1.122]
no misfit
no misfit
-0.05
LSAS.SR_12a
0.829
[0.928, 1.082]
0.819
[0.907, 1.097]
0.099
0.088
-0.20
LSAS.SR_14a
0.988
[0.936, 1.06]
0.979
[0.927, 1.075]
no misfit
no misfit
-0.19
LSAS.SR_16a
1.129
[0.93, 1.09]
1.096
[0.929, 1.123]
0.039
no misfit
-1.69
LSAS.SR_18b
1.04
[0.928, 1.094]
1.052
[0.921, 1.107]
no misfit
no misfit
-0.52
LSAS.SR_19a
1.017
[0.918, 1.086]
1.012
[0.908, 1.081]
no misfit
no misfit
0.56
LSAS.SR_22a
1.005
[0.933, 1.08]
0.986
[0.929, 1.088]
no misfit
no misfit
0.64
LSAS.SR_24a
1.009
[0.926, 1.085]
0.975
[0.925, 1.092]
no misfit
no misfit
1.02
Note:
MSQ values based on conditional calculations (n = 1612 complete cases). Simulation based thresholds from 300 simulated datasets.
Item Var gamma se pvalue padj.BH sig lower upper
1 LSAS.SR_2b grouping_based_on_score NaN NaN NA NA ? NA NA
2 LSAS.SR_5b grouping_based_on_score NaN NaN NA NA ? NA NA
3 LSAS.SR_8a grouping_based_on_score NaN NaN NA NA ? NA NA
4 LSAS.SR_10a grouping_based_on_score NaN NaN NA NA ? NA NA
5 LSAS.SR_12a grouping_based_on_score NaN NaN NA NA ? NA NA
6 LSAS.SR_14a grouping_based_on_score NaN NaN NA NA ? NA NA
7 LSAS.SR_16a grouping_based_on_score NaN NaN NA NA ? NA NA
8 LSAS.SR_18b grouping_based_on_score NaN NaN NA NA ? NA NA
9 LSAS.SR_19a grouping_based_on_score NaN NaN NA NA ? NA NA
10 LSAS.SR_22a grouping_based_on_score NaN NaN NA NA ? NA NA
11 LSAS.SR_24a grouping_based_on_score NaN NaN NA NA ? NA NA
MSQ values based on conditional estimation (n = 1612 complete cases).
Code
# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1<-RIgetfit(df_ls, iterations =300, cpu =n_cores)# RIitemfit(df_ls, simfit1)
Item
InfitMSQ
Infit thresholds
OutfitMSQ
Outfit thresholds
Infit diff
Outfit diff
Location
LSAS.SR_2b
0.987
[0.929, 1.078]
0.986
[0.916, 1.091]
no misfit
no misfit
0.14
LSAS.SR_5b
1.003
[0.935, 1.064]
1.001
[0.919, 1.065]
no misfit
no misfit
0.07
LSAS.SR_8a
1.02
[0.917, 1.064]
1.042
[0.878, 1.133]
no misfit
no misfit
0.16
LSAS.SR_10a
0.986
[0.905, 1.091]
0.985
[0.892, 1.115]
no misfit
no misfit
-0.07
LSAS.SR_14a
0.962
[0.933, 1.088]
0.953
[0.92, 1.113]
no misfit
no misfit
-0.21
LSAS.SR_16a
1.086
[0.929, 1.084]
1.056
[0.916, 1.08]
0.002
no misfit
-1.67
LSAS.SR_18b
1.023
[0.926, 1.081]
1.037
[0.908, 1.11]
no misfit
no misfit
-0.52
LSAS.SR_19a
1.021
[0.909, 1.057]
1.015
[0.902, 1.057]
no misfit
no misfit
0.53
LSAS.SR_22a
0.984
[0.913, 1.067]
0.968
[0.903, 1.077]
no misfit
no misfit
0.61
LSAS.SR_24a
0.981
[0.916, 1.092]
0.949
[0.91, 1.099]
no misfit
no misfit
0.98
Note:
MSQ values based on conditional calculations (n = 1612 complete cases). Simulation based thresholds from 300 simulated datasets.
Item Var gamma se pvalue padj.BH sig lower upper
1 LSAS.SR_2b grouping_based_on_score NaN NaN NA NA ? NA NA
2 LSAS.SR_5b grouping_based_on_score NaN NaN NA NA ? NA NA
3 LSAS.SR_8a grouping_based_on_score NaN NaN NA NA ? NA NA
4 LSAS.SR_10a grouping_based_on_score NaN NaN NA NA ? NA NA
5 LSAS.SR_14a grouping_based_on_score NaN NaN NA NA ? NA NA
6 LSAS.SR_16a grouping_based_on_score NaN NaN NA NA ? NA NA
7 LSAS.SR_18b grouping_based_on_score NaN NaN NA NA ? NA NA
8 LSAS.SR_19a grouping_based_on_score NaN NaN NA NA ? NA NA
9 LSAS.SR_22a grouping_based_on_score NaN NaN NA NA ? NA NA
10 LSAS.SR_24a grouping_based_on_score NaN NaN NA NA ? NA NA
Item 16a does seem to have problems across several dimensions except the boostrapped itemfit. Residual correlations, partial gamma, and DIF on age. We try to remove it .
MSQ values based on conditional estimation (n = 1612 complete cases).
Code
# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1<-RIgetfit(df_ls, iterations =300, cpu =n_cores)# RIitemfit(df_ls, simfit1)
Item
InfitMSQ
Infit thresholds
OutfitMSQ
Outfit thresholds
Infit diff
Outfit diff
Location
LSAS.SR_2b
1.036
[0.919, 1.074]
1.042
[0.912, 1.091]
no misfit
no misfit
-0.05
LSAS.SR_5b
1.011
[0.933, 1.075]
1.014
[0.929, 1.081]
no misfit
no misfit
-0.12
LSAS.SR_8a
1.031
[0.935, 1.084]
1.055
[0.925, 1.111]
no misfit
no misfit
-0.03
LSAS.SR_10a
0.966
[0.923, 1.081]
0.966
[0.911, 1.098]
no misfit
no misfit
-0.26
LSAS.SR_14a
1
[0.917, 1.082]
0.996
[0.908, 1.096]
no misfit
no misfit
-0.40
LSAS.SR_18b
1.035
[0.927, 1.087]
1.047
[0.898, 1.111]
no misfit
no misfit
-0.72
LSAS.SR_19a
1.02
[0.934, 1.062]
1.015
[0.933, 1.068]
no misfit
no misfit
0.35
LSAS.SR_22a
0.958
[0.94, 1.062]
0.939
[0.936, 1.067]
no misfit
no misfit
0.43
LSAS.SR_24a
0.954
[0.944, 1.078]
0.916
[0.938, 1.092]
no misfit
0.022
0.80
Note:
MSQ values based on conditional calculations (n = 1612 complete cases). Simulation based thresholds from 300 simulated datasets.
Item Var gamma se pvalue padj.BH sig lower upper
1 LSAS.SR_2b grouping_based_on_score NaN NaN NA NA ? NA NA
2 LSAS.SR_5b grouping_based_on_score NaN NaN NA NA ? NA NA
3 LSAS.SR_8a grouping_based_on_score NaN NaN NA NA ? NA NA
4 LSAS.SR_10a grouping_based_on_score NaN NaN NA NA ? NA NA
5 LSAS.SR_14a grouping_based_on_score NaN NaN NA NA ? NA NA
6 LSAS.SR_18b grouping_based_on_score NaN NaN NA NA ? NA NA
7 LSAS.SR_19a grouping_based_on_score NaN NaN NA NA ? NA NA
8 LSAS.SR_22a grouping_based_on_score NaN NaN NA NA ? NA NA
9 LSAS.SR_24a grouping_based_on_score NaN NaN NA NA ? NA NA
24a “Resiting sales” do show some misfit, residual correlations and overfitting in the boostrap. Its not much, however the item does have better targetting than 22a. Removing 22a.
MSQ values based on conditional estimation (n = 1612 complete cases).
Code
# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1<-RIgetfit(df_ls, iterations =300, cpu =n_cores)# RIitemfit(df_ls, simfit1)
Item
InfitMSQ
Infit thresholds
OutfitMSQ
Outfit thresholds
Infit diff
Outfit diff
Location
LSAS.SR_2b
1.003
[0.929, 1.072]
1.008
[0.916, 1.098]
no misfit
no misfit
0.00
LSAS.SR_5b
0.976
[0.917, 1.09]
0.973
[0.912, 1.091]
no misfit
no misfit
-0.07
LSAS.SR_8a
1.031
[0.941, 1.089]
1.061
[0.933, 1.105]
no misfit
no misfit
0.03
LSAS.SR_10a
0.988
[0.922, 1.089]
0.984
[0.919, 1.1]
no misfit
no misfit
-0.20
LSAS.SR_14a
0.988
[0.91, 1.091]
0.984
[0.903, 1.111]
no misfit
no misfit
-0.34
LSAS.SR_18b
1.012
[0.932, 1.105]
1.014
[0.927, 1.096]
no misfit
no misfit
-0.66
LSAS.SR_19a
1.008
[0.918, 1.045]
1.005
[0.91, 1.05]
no misfit
no misfit
0.40
LSAS.SR_24a
1.011
[0.938, 1.081]
0.976
[0.933, 1.088]
no misfit
no misfit
0.85
Note:
MSQ values based on conditional calculations (n = 1612 complete cases). Simulation based thresholds from 300 simulated datasets.
Item Var gamma se pvalue padj.BH sig lower upper
1 LSAS.SR_2b grouping_based_on_score NaN NaN NA NA ? NA NA
2 LSAS.SR_5b grouping_based_on_score NaN NaN NA NA ? NA NA
3 LSAS.SR_8a grouping_based_on_score NaN NaN NA NA ? NA NA
4 LSAS.SR_10a grouping_based_on_score NaN NaN NA NA ? NA NA
5 LSAS.SR_14a grouping_based_on_score NaN NaN NA NA ? NA NA
6 LSAS.SR_18b grouping_based_on_score NaN NaN NA NA ? NA NA
7 LSAS.SR_19a grouping_based_on_score NaN NaN NA NA ? NA NA
8 LSAS.SR_24a grouping_based_on_score NaN NaN NA NA ? NA NA
pfit_u3poly<-PerFit::U3poly(matrix =df_ls, Ncat =4, # make sure to input number of response categories, not thresholds IRT.PModel ="PCM")misfits<-PerFit::flagged.resp(pfit_u3poly)%>%pluck("Scores")%>%as.data.frame()%>%pull(FlaggedID)misfits2<-RIpfit(df_ls, output ="rowid")
---title: "Rasch analysis of outcomes"subtitle: "LSAS-SR"title-block-banner: "#4F0433" title-block-banner-color: "#FFFFFF"author: - name: Nils Hentati Isacsson affiliation: Centre for Psychiatry Research, Department of Clinical Neuroscience, Karolinska Institutet, & Stockholm Health Care Services, Region Stockholm, Sweden affiliation-url: orcid: 0000-0002-5749-5310 - name: 'Magnus Johansson' affiliation: 'RISE Research Institutes of Sweden' affiliation-url: 'https://ri.se/en/shic' orcid: '0000-0003-1669-592X'date: last-modifieddate-format: isoalways_allow_html: trueformat: html: toc: true toc-depth: 3 toc-title: "Table of contents" embed-resources: true standalone: true page-layout: full mainfont: 'Lato' monofont: 'Roboto Mono' code-overflow: wrap code-fold: true code-tools: true code-link: true number-sections: true fig-dpi: 96 layout-align: left linestretch: 1.6 theme: - materia - custom.scss css: styles.css license: CC BY pdf: papersize: a4 documentclass: report execute: echo: true warning: false message: false cache: falseeditor_options: markdown: wrap: 72 chunk_output_type: console---# Setup ```{r}#| label: setuplibrary(easyRasch) # devtools::install_github("pgmj/easyRasch")#library(RISEkbmRasch)library(grateful)library(ggrepel)library(car)library(kableExtra)library(tidyverse)library(eRm)library(iarm)library(mirt)library(psych)library(psychotree)library(matrixStats)library(reshape)library(knitr)library(patchwork)library(formattable) library(glue)# residual corr error ### optional libraries#library(TAM)#library(skimr)#library(janitor)# 3116259# nohup quarto render LSASSR.qmd --to html &> render_lsas.out & ### some commands exist in multiple packages, here we define preferred ones that are frequently usedselect <- dplyr::selectcount <- dplyr::countrecode <- car::recoderename <- dplyr::renamepath_prim <-'/srv/projects/nils/study4rasch'path2 <-'/Volumes/projects/k8_CPF_Kaldo/Data/Lärande Maskiner/Nils/Study4data'running_machine =Sys.info()[['sysname']]path =ifelse(running_machine!='Linux',path2,path_prim)set_mywd_path =ifelse(running_machine!='Linux','./study4rasch/rasch_measures','./rasch_measures')n_cores =ifelse(running_machine!='Linux',1,8)tryCatch({setwd(set_mywd_path)}, error =function(set_mywd_path){print('already in right directory')})source('settings.R') # containing item labels source('mod_easyRasch_func.R') # modified functions + added ```# Importing data```{r, import data}### import datadf_raw <-read.csv(file=file.path(path,'data','raw_data','outcome.csv')) #### Clean data source("preprocess.R")df <-preprocess_data(df_raw)# items to use items_to_use <-'LSAS.SR'# options are #MADRS.S, LSAS.SR, PDSS.SRitemlabels <- itemlabels %>%as_tibble() %>%filter(str_detect(itemnr, items_to_use))# DIF+aux variables to use # DIF not used 'marital_status','children', 'working'dif_variables <-c('Patient','Treatment','sex','age','TreatmentAccessStart','education')### Make a backup of the dataframe, in case you need to revert changes at some pointdf.all <- dfprint(itemlabels)```# Checking missing ```{r,missing numbers}#| label: MissingitemStart <- items_to_useout =RImissing_mod(df,itemStart,facet="Time")print(out)```As we see here LSAS.SR is not used for all conditions over the course of their treatments. However during screening they are. We start by showing all timepoints, to draw targeting plots.## Remove missing ```{r, filter missing}##### Before filtering out participants, you should check the missing data structure using RImissing() and RImissingP()# If you want to include participants with missing data, input the minimum number of items responses that a participant should have to be included in the analysis:min.responses <-4# In our case if they have missing on one item all items for that time are missing. df_save <- df# Select the variables we will work with, and filter out respondents with missing datadf <- df %>%select(all_of(c(dif_variables,"Time",itemlabels$itemnr))) %>%# vfilter(rowSums(is.na(select(.,all_of(itemlabels$itemnr)))) < min.responses) ```# Create DIF variables ```{r, dif creation}#---- Create DIF variables----# DIF variables into vectors, recoded as factors since DIF functions need this# these could also be stored in its own dataframe (not a tibble) instead of as vectors# Named vector for the new typestype_transform <-c(Treatment ="factor", sex ="factor", age ="numeric",TreatmentAccessStart ="integer",education="factor",Time='factor')# Transform columns based on the named vectordif <- df %>%mutate(across(names(type_transform), ~switch(type_transform[which(names(type_transform) ==cur_column())], "integer"=as.integer(.), "character"=as.character(.),"factor"=as.factor(.), "numeric"=as.numeric(.)))) %>%as.data.frame(.) %>%select(!all_of(itemlabels$itemnr))# then remove them from dataframe, since we need a dataframe with only item data for the Rasch analysesdf <- df %>%select(all_of(c("Time",itemlabels$itemnr))) # add time here ? dfnotime <- df %>%select(!Time)source("RISE_theme.R")```# Demographics descriptives ```{r, demographics}#| layout-ncol: 2dif_spec <- dif %>%filter(Time=='SCREEN') %>%select(!all_of(c("Time","Patient")))summary(dif_spec)# RIdemographics(dif_spec,'Treatment') <- function crashes TODO make own ```# Overall number of responses ```{r, n in each cell}#| layout-ncol: 1#| fig-width: 9# Collapsed RIallresp(dfnotime)# Seperate RIallresp_over_times <- df %>%# split(.$Time) %>%# split the data map(~RIallresp(.x %>% dplyr::select(!Time))) #+ labs(title = .x$Time)) # create separate for each time# make nice later RI_allresp_kable_grid =combine_kables_grid(RIallresp_over_times,cols=3)RI_allresp_kable_grid```# Descriptives of raw dataPaste descriptive part here after rasch itterations # Timepoint decision Due to the amount of data across timepoints targeting will first be inspected, and the best fitting timepoint will be chosen based on targeting, which in turn will inform the rest of the analyses. ## Targeting ```{r, targeting, eval=FALSE}#| fig-height: 60#| fig-width: 20# increase fig-height above as needed, if you have many items og. value was 5. #RItargeting(dfnotime)require(patchwork)targeting_time_plots <-ggplots_with_spec_func_over_time( df,func_call = RItargeting,title_all='',blankx=FALSE,colsplot =3,rowsplot=5,return_list_plots=TRUE)good_layout_targeting_plots <-wrap_plots(targeting_time_plots, ncol =3) +plot_layout(guides="collect")#print(good_layout_targeting_plots)ggsave(filename =paste0("./plots/",items_to_use,"_targeting_over_time.png"),plot = good_layout_targeting_plots,width =20, # Increase width (in inches)height =60, # Increase height (in inches)dpi =300, # High resolutionlimitsize=FALSE)print(good_layout_targeting_plots)```Pre has very good targeting looks promising. We also note that this is only for patient in treatment for social anxiety disorder (no other treatment-conditions are mixed in PRE). We doublecheck how much data we have for each threshold. (Recomm. 20+). ## Number of responses per threshhold ```{r, thresh-responses,eval=FALSE}dataframen4 <- df %>%filter(Time=="WEEK02") %>%select(!Time)dfthreshcount =sapply(dataframen4, function(column) table(column))print(dfthreshcount)#dfthreshcount[dfthreshcount<20]```## Timepoint conclusionBased on the above we move forward with the PRE timepoint. ```{r,subset_chosen_timepoint}df_pre <- df %>%filter(Time=='PRE') %>%select(!Time) #%>% sample_n(800)```# Mokken part 1 Its a large scale, with paired items. ::: panel-tabset## Check number of scales```{r, mokken p1}require(mokken)lsasmok <-aisp(df_pre, verbose =TRUE)first_scale =names(lsasmok[lsasmok[,1]==1,])df_s1 <- df_pre %>%select(all_of(first_scale))```We see that while 3 scales are identified the predominant is the one is large. Moving on with this one which also contains the core symptoms. ## Scaleability part 1```{r}scaleH <-coefH(df_s1,results=FALSE)threshold <-0.35H_df <-as.data.frame(scaleH$Hi[,1]) %>%rename('H'='scaleH$Hi[, 1]') %>%mutate(H=as.numeric(H),item =rownames(.))kbl_rise(scaleH$Hi)remove <- H_df[H_df[['H']] < threshold,'item'] keep <- H_df[H_df[['H']] >= threshold, 'item'] df_ls <- df_s1 %>%select(all_of(keep))```We see many items around 0.3, 0.35 clips the number of items in half. ## Scaleability part 2```{r}scaleH2 <-coefH(df_ls,results=FALSE)kbl_rise(scaleH2$Hi)```## EFA ```{r}require(lavaan)items_from_mokken <-names(df_ls)f1 <-paste0('efa("efa")*f1 =~',paste0(items_from_mokken,collapse=' + '))# 2-factor modelf2 <-paste0('efa("efa")*f1 + efa("efa")*f2 =~',paste0(items_from_mokken,collapse=' + '))# 3-factorf3 <-paste0('efa("efa")*f1 + efa("efa")*f2 + efa("efa")*f3 =~',paste0(items_from_mokken,collapse=' + '))# 4-factorf4 <-paste0('efa("efa")*f1 + efa("efa")*f2 + efa("efa")*f3 + efa("efa")*f4 =~',paste0(items_from_mokken,collapse=' + '))# 5-factorf5 <-paste0('efa("efa")*f1 + efa("efa")*f2 + efa("efa")*f3 + efa("efa")*f4 + efa("efa")*f5 =~',paste0(items_from_mokken,collapse=' + '))efa_f1 <-cfa(model = f1,data = df_ls,rotation ="oblimin",estimator ="WLSMV",ordered =TRUE)efa_f2 <-cfa(model = f2,data = df_ls,rotation ="oblimin",estimator ="WLSMV",ordered =TRUE)efa_f3 <-cfa(model = f3,data = df_ls,rotation ="oblimin",estimator ="WLSMV",ordered =TRUE)efa_f4 <-cfa(model = f4,data = df_ls,rotation ="oblimin",estimator ="WLSMV",ordered =TRUE)efa_f5 <-cfa(model = f5,data = df_ls,rotation ="oblimin",estimator ="WLSMV",ordered =TRUE)```## EFA Fit metrics```{r}fit_metrics_scaled <-c("chisq.scaled", "df", "pvalue.scaled", "cfi.scaled", "tli.scaled", "rmsea.scaled", "rmsea.ci.lower.scaled","rmsea.ci.upper.scaled","srmr")rbind(fitmeasures(efa_f1, fit_metrics_scaled),fitmeasures(efa_f2, fit_metrics_scaled),fitmeasures(efa_f3, fit_metrics_scaled),fitmeasures(efa_f4, fit_metrics_scaled),fitmeasures(efa_f5, fit_metrics_scaled) ) %>%as.data.frame() %>%mutate(across(where(is.numeric),~round(.x, 3))) %>%rename(Chi2.scaled = chisq.scaled,p.scaled = pvalue.scaled,CFI.scaled = cfi.scaled,TLI.scaled = tli.scaled,RMSEA.scaled = rmsea.scaled,CI_low.scaled = rmsea.ci.lower.scaled,CI_high.scaled = rmsea.ci.upper.scaled,SRMR = srmr) %>%add_column(Model =paste0(1:5,"-factor"), .before ="Chi2.scaled") %>% knitr::kable()```## Factor loadings 1 factor```{r}standardizedsolution(efa_f1) %>%filter(op =="=~") %>%mutate(item =str_remove(rhs, "LSAS.SR_"),factor =str_remove(lhs, "f"))modificationIndices(efa_f1,standardized = T) %>%as.data.frame(row.names =NULL) %>%filter(mi >50) %>%arrange(desc(mi)) %>%mutate(across(where(is.numeric),~round(.x, 3))) %>% knitr::kable()```Residual correlations between item pairs are strong. ## Factor loadings 2 factor ```{r}#item = str_replace_all(str_remove(rhs, "LSAS.SR_"),c("a" = "1", "b" = "2")) %>% as.double()standardizedsolution(efa_f2) %>%filter(op =="=~") %>%mutate(item =str_remove(rhs, "LSAS.SR_"),factor =str_remove(lhs, "f")) %>%# plotggplot(aes(x = est.std, xmin = ci.lower, xmax = ci.upper, y = item)) +annotate(geom ="rect",xmin =-1, xmax =1,ymin =-Inf, ymax =Inf,fill ="grey90") +annotate(geom ="rect",xmin =-0.7, xmax =0.7,ymin =-Inf, ymax =Inf,fill ="grey93") +annotate(geom ="rect",xmin =-0.3, xmax =0.3,ymin =-Inf, ymax =Inf,fill ="grey96") +geom_vline(xintercept =0, color ="white") +geom_pointrange(aes(alpha =abs(est.std) <0.3),fatten =10)+geom_text(aes(label = item, color =abs(est.std) <0.3),size =4) +scale_color_manual(values =c("white", "transparent")) +scale_alpha_manual(values =c(1, 1/3)) +scale_x_continuous(expand =c(0, 0), limits =c(-1, 1),breaks =c(-1, -0.7, -0.3, 0, 0.3, 0.7, 1),labels =c("-1", "-.7", "-.3", "0", ".3", ".7", "1")) +ggtitle("Factor loadings for the 3-factor model") +theme(legend.position ="none") +facet_wrap(~ factor, labeller = label_both) ```## Factor loadings 3 factor ```{r}#item = str_replace_all(str_remove(rhs, "LSAS.SR_"),c("a" = "1", "b" = "2")) %>% as.double()standardizedsolution(efa_f3) %>%filter(op =="=~") %>%mutate(item =str_remove(rhs, "LSAS.SR_"),factor =str_remove(lhs, "f")) %>%# plotggplot(aes(x = est.std, xmin = ci.lower, xmax = ci.upper, y = item)) +annotate(geom ="rect",xmin =-1, xmax =1,ymin =-Inf, ymax =Inf,fill ="grey90") +annotate(geom ="rect",xmin =-0.7, xmax =0.7,ymin =-Inf, ymax =Inf,fill ="grey93") +annotate(geom ="rect",xmin =-0.3, xmax =0.3,ymin =-Inf, ymax =Inf,fill ="grey96") +geom_vline(xintercept =0, color ="white") +geom_pointrange(aes(alpha =abs(est.std) <0.3),fatten =10)+geom_text(aes(label = item, color =abs(est.std) <0.3),size =4) +scale_color_manual(values =c("white", "transparent")) +scale_alpha_manual(values =c(1, 1/3)) +scale_x_continuous(expand =c(0, 0), limits =c(-1, 1),breaks =c(-1, -0.7, -0.3, 0, 0.3, 0.7, 1),labels =c("-1", "-.7", "-.3", "0", ".3", ".7", "1")) +ggtitle("Factor loadings for the 3-factor model") +theme(legend.position ="none") +facet_wrap(~ factor, labeller = label_both) ```:::We see that the EFA essentially captures the item pairs of each question. Even constraining the factors two 2, does not confirm that fear and avoidance load on different factors. EFA is thus not very informative except that the items are not unidimensional. And we move on with the 19 items, and with the a/b distinction in mind - ergo that they have high residual correlations - aim to remove atleast 1 of each item pair. In regards to the mokken part due note that the talking to new people and being with strangers have the higest H. # Rasch analysis 1```{r, listmargins-1}#| column: marginRIlistItemsMargin(df_ls, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit.1}RIitemfit(df_ls, cutoff ="Smith98")# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1 <-RIgetfit(df_ls, iterations =300, cpu = n_cores) # RIitemfit(df_ls, simfit1)RIgetfitPlot(simfit1, df_ls)```### CICC```{r, CICCp1.1}#skipped right now #ICCplot(as.data.frame(df_pre), # itemnumber = 1:4, # method = "cut", # itemdescrip = itemlabels$itemnr[1:4])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2.1}#ICCplot(as.data.frame(df_pre), # itemnumber = 5:7, # method = "cut", # itemdescrip = itemlabels$itemnr[5:7])```This is severely limited due to the large number of items ### Item-restscore```{r, normal restscore.1}RIrestscore(df_ls)```### Item-restscore-bootstrapped```{r,bootstrap restscore.1}RIbootRestscore(df_ls,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA.1}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_ls)```### Residual correlations```{r, Residual corr.1}source('mod_easyRasch_func.R') # Reloading function req. simcor1 <-RIgetResidCor(df_ls, iterations =500, cpu = n_cores) # 500RIresidcorr(df_ls, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma.1}RIpartgamLD(df_ls)```### 1st contrast loadings```{r, contrast loadings.1}RIloadLoc(df_ls)```### Response categories```{r, resp categories.1}#| fig-height: 21mirt(df_ls, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again.1}#| fig-height: 21# increase fig-height above as needed, if you have many itemsRItargeting(df_ls)```### Item hierarchy```{r, Item hierarchy.1}#| fig-height: 21RIitemHierarchy(df_ls)```### Score groups```{r, score groups.1}iarm::score_groups(as.data.frame(df_ls)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_ls %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_ls, model ="PCM")item_obsexp(PCM(df_ls))grouping_based_on_score =score_groups(as.data.frame(df_ls))partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex.1}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_ls, dif.sex)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.sex) ```### DIF-age```{r, dif-age.1}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_ls, dif.age)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.age) ```### DIF-education```{r, dif-edu.1}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_ls, dif.edu)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr.1}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_ls, dif.yr)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.yr) ```:::## Analysis part 1 decision We see here that the biggest problem is the residual correlations coming from the posed situations having an fear component (a) and an avoidance component (b) and they are highly correlated. Yet from the mokken they do seem to load on the same dimension (party confirmed with the EFA) - and while the PCA and the component plot in the rasch analyses is more nauanced it is not decisive. The targeting across a/b is mostly equivalent. We inspect score distributions overall as well ```{r}RItileplot(df_ls)```- Of 2 a/b **a** had a more even distribution - residual cor = 0.52 - b overfit = 5%Marginal differences here and small overfit. 2b had also smaller residual correlations to other items (including a items below which will be kept)Keeping 2b - Of 8 a/b **a** had a more even distribution - residual cor = 0.66 - b underfit = 82%- Of 10 a/b **a** had a more even distribution - residual cor = 0.68 - b overfit = 14%- Of 11 a/b **b** had a more even distribution - residual cor = 0.28 - a overfit = 100%- Of 12 a/b **b** had a more even distirbution - residual cor = 0.54 - a overfit = 100% - b overfit = 99.8%However the residual correlation between 12 b and 11b was high .44 . And correlation between 11b and 12a was lower (.28) keeping 12a for now. - Of 14 a/b **a** had a more even distribution - residual cor = 0.57 - a and b no misfit- Of 18 a/b **b** had a more even distirbution - residual cor = 0.65 - a overfit = 14% Thus in the first round we remove 2a, 8b, 10b, 11a, 12a, 14b, 18a. # Rasch analysis 2```{r, listmargins-2}#| column: margindf_ls <- df_ls %>%select(!c(LSAS.SR_2a,LSAS.SR_8b,LSAS.SR_10b,LSAS.SR_11a,LSAS.SR_12b,LSAS.SR_14b,LSAS.SR_18a))RIlistItemsMargin(df_ls, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit.2}RIitemfit(df_ls, cutoff ="Smith98")# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1 <-RIgetfit(df_ls, iterations =300, cpu = n_cores) # RIitemfit(df_ls, simfit1)RIgetfitPlot(simfit1, df_ls)```### CICC```{r, CICCp1.2}#skipped right now #ICCplot(as.data.frame(df_pre), # itemnumber = 1:4, # method = "cut", # itemdescrip = itemlabels$itemnr[1:4])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2.2}#ICCplot(as.data.frame(df_pre), # itemnumber = 5:7, # method = "cut", # itemdescrip = itemlabels$itemnr[5:7])```This is severely limited due to the large number of items ### Item-restscore```{r, normal restscore.2}RIrestscore(df_ls)```### Item-restscore-bootstrapped```{r,bootstrap restscore.2}RIbootRestscore(df_ls,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA.2}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_ls)```### Residual correlations```{r, Residual corr.2}source('mod_easyRasch_func.R') # Reloading function req. simcor1 <-RIgetResidCor(df_ls, iterations =500, cpu = n_cores) # 500RIresidcorr(df_ls, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma.2}RIpartgamLD(df_ls)```### 1st contrast loadings```{r, contrast loadings.2}RIloadLoc(df_ls)```### Response categories```{r, resp categories.2}#| fig-height: 21mirt(df_ls, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again.2}#| fig-height: 21# increase fig-height above as needed, if you have many itemsRItargeting(df_ls)```### Item hierarchy```{r, Item hierarchy.2}#| fig-height: 21RIitemHierarchy(df_ls)```### Score groups```{r, score groups.2}iarm::score_groups(as.data.frame(df_ls)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_ls %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_ls, model ="PCM")item_obsexp(PCM(df_ls))grouping_based_on_score =score_groups(as.data.frame(df_ls))partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex.2}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_ls, dif.sex)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.sex) ```### DIF-age```{r, dif-age.2}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_ls, dif.age)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.age) ```### DIF-education```{r, dif-edu.2}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_ls, dif.edu)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr.2}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_ls, dif.yr)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.yr) ```:::## Analysis part 2 decision Most misfit is 11b and 12a both are cardinal symptoms. Both equally overfit (100%). 11b has slightly more residual correlations (12a not included). 11b has more DIF difference in age. 11b slight more overfit. Removing 11b. # Rasch analysis 3```{r, listmargins-3}#| column: margindf_ls <- df_ls %>%select(!c(LSAS.SR_11b))RIlistItemsMargin(df_ls, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit.3}RIitemfit(df_ls, cutoff ="Smith98")# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1 <-RIgetfit(df_ls, iterations =300, cpu = n_cores) # RIitemfit(df_ls, simfit1)RIgetfitPlot(simfit1, df_ls)```### CICC```{r, CICCp1.3}#skipped right now #ICCplot(as.data.frame(df_pre), # itemnumber = 1:4, # method = "cut", # itemdescrip = itemlabels$itemnr[1:4])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2.3}#ICCplot(as.data.frame(df_pre), # itemnumber = 5:7, # method = "cut", # itemdescrip = itemlabels$itemnr[5:7])```This is severely limited due to the large number of items ### Item-restscore```{r, normal restscore.3}RIrestscore(df_ls)```### Item-restscore-bootstrapped```{r,bootstrap restscore.3}RIbootRestscore(df_ls,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA.3}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_ls)```### Residual correlations```{r, Residual corr.3}source('mod_easyRasch_func.R') # Reloading function req. simcor1 <-RIgetResidCor(df_ls, iterations =500, cpu = n_cores) # 500RIresidcorr(df_ls, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma.3}RIpartgamLD(df_ls)```### 1st contrast loadings```{r, contrast loadings.3}RIloadLoc(df_ls)```### Response categories```{r, resp categories.3}#| fig-height: 21mirt(df_ls, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again.3}#| fig-height: 21# increase fig-height above as needed, if you have many itemsRItargeting(df_ls)```### Item hierarchy```{r, Item hierarchy.3}#| fig-height: 21RIitemHierarchy(df_ls)```### Score groups```{r, score groups.3}iarm::score_groups(as.data.frame(df_ls)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_ls %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_ls, model ="PCM")item_obsexp(PCM(df_ls))grouping_based_on_score =score_groups(as.data.frame(df_ls))partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex.3}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_ls, dif.sex)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.sex) ```### DIF-age```{r, dif-age.3}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_ls, dif.age)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.age) ```### DIF-education```{r, dif-edu.3}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_ls, dif.edu)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr.3}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_ls, dif.yr)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.yr) ```:::## Analysis part 3 decision Trying remove 12a # Rasch analysis 4Removing 12a```{r, listmargins-4}#| column: margindf_ls <- df_ls %>%select(!c(LSAS.SR_12a))RIlistItemsMargin(df_ls, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit.4}RIitemfit(df_ls, cutoff ="Smith98")# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1 <-RIgetfit(df_ls, iterations =300, cpu = n_cores) # RIitemfit(df_ls, simfit1)RIgetfitPlot(simfit1, df_ls)```### CICC```{r, CICCp1.4}#skipped right now #ICCplot(as.data.frame(df_pre), # itemnumber = 1:4, # method = "cut", # itemdescrip = itemlabels$itemnr[1:4])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2.4}#ICCplot(as.data.frame(df_pre), # itemnumber = 5:7, # method = "cut", # itemdescrip = itemlabels$itemnr[5:7])```This is severely limited due to the large number of items ### Item-restscore```{r, normal restscore.4}RIrestscore(df_ls)```### Item-restscore-bootstrapped```{r,bootstrap restscore.4}RIbootRestscore(df_ls,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA.4}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_ls)```### Residual correlations```{r, Residual corr.4}source('mod_easyRasch_func.R') # Reloading function req. simcor1 <-RIgetResidCor(df_ls, iterations =500, cpu = n_cores) # 500RIresidcorr(df_ls, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma.4}RIpartgamLD(df_ls)```### 1st contrast loadings```{r, contrast loadings.4}RIloadLoc(df_ls)```### Response categories```{r, resp categories.4}#| fig-height: 21mirt(df_ls, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again.4}#| fig-height: 21# increase fig-height above as needed, if you have many itemsRItargeting(df_ls)```### Item hierarchy```{r, Item hierarchy.4}#| fig-height: 21RIitemHierarchy(df_ls)```### Score groups```{r, score groups.4}iarm::score_groups(as.data.frame(df_ls)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_ls %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_ls, model ="PCM")item_obsexp(PCM(df_ls))grouping_based_on_score =score_groups(as.data.frame(df_ls))partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex.4}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_ls, dif.sex)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.sex) ```### DIF-age```{r, dif-age.4}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_ls, dif.age)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.age) ```### DIF-education```{r, dif-edu.4}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_ls, dif.edu)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr.4}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_ls, dif.yr)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.yr) ```:::## Analysis part 4 decision Item 16a does seem to have problems across several dimensions except the boostrapped itemfit. Residual correlations, partial gamma, and DIF on age. We try to remove it . # Rasch analysis 5Removing 16a```{r, listmargins-5}#| column: margindf_ls <- df_ls %>%select(!c(LSAS.SR_16a))RIlistItemsMargin(df_ls, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit.5}RIitemfit(df_ls, cutoff ="Smith98")# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1 <-RIgetfit(df_ls, iterations =300, cpu = n_cores) # RIitemfit(df_ls, simfit1)RIgetfitPlot(simfit1, df_ls)```### CICC```{r, CICCp1.5}#skipped right now #ICCplot(as.data.frame(df_pre), # itemnumber = 1:4, # method = "cut", # itemdescrip = itemlabels$itemnr[1:4])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2.5}#ICCplot(as.data.frame(df_pre), # itemnumber = 5:7, # method = "cut", # itemdescrip = itemlabels$itemnr[5:7])```This is severely limited due to the large number of items ### Item-restscore```{r, normal restscore.5}RIrestscore(df_ls)```### Item-restscore-bootstrapped```{r,bootstrap restscore.5}RIbootRestscore(df_ls,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA.5}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_ls)```### Residual correlations```{r, Residual corr.5}source('mod_easyRasch_func.R') # Reloading function req. simcor1 <-RIgetResidCor(df_ls, iterations =500, cpu = n_cores) # 500RIresidcorr(df_ls, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma.5}RIpartgamLD(df_ls)```### 1st contrast loadings```{r, contrast loadings.5}RIloadLoc(df_ls)```### Response categories```{r, resp categories.5}#| fig-height: 7mirt(df_ls, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again.5}#| fig-height: 7# increase fig-height above as needed, if you have many itemsRItargeting(df_ls)```### Item hierarchy```{r, Item hierarchy.5}#| fig-height: 7RIitemHierarchy(df_ls)```### Score groups```{r, score groups.5}iarm::score_groups(as.data.frame(df_ls)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_ls %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_ls, model ="PCM")item_obsexp(PCM(df_ls))grouping_based_on_score =score_groups(as.data.frame(df_ls))partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex.5}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_ls, dif.sex)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.sex) ```### DIF-age```{r, dif-age.5}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_ls, dif.age)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.age) ```### DIF-education```{r, dif-edu.5}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_ls, dif.edu)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr.5}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_ls, dif.yr)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.yr) ```:::## Analysis part 5 decision 24a "Resiting sales" do show some misfit, residual correlations and overfitting in the boostrap. Its not much, however the item does have better targetting than 22a. Removing 22a. # Rasch analysis 6Removing 22a```{r, listmargins-6}#| column: margindf_ls <- df_ls %>%select(!c(LSAS.SR_22a))RIlistItemsMargin(df_ls, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit.6}RIitemfit(df_ls, cutoff ="Smith98")# Getting error for estimating the simfit for LSAS. # Getting same error for week 2 and week 1 #df_week02 <- df %>% filter(Time=='WEEK02') %>% select(!Time) simfit1 <-RIgetfit(df_ls, iterations =300, cpu = n_cores) # RIitemfit(df_ls, simfit1)RIgetfitPlot(simfit1, df_ls)```### CICC```{r, CICCp1.6}#skipped right now ICCplot(as.data.frame(df_pre), itemnumber =1:4, method ="cut", itemdescrip = itemlabels$itemnr[c(4,10,15,19)])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2.6}ICCplot(as.data.frame(df_pre), itemnumber =5:8, method ="cut", itemdescrip = itemlabels$itemnr[c(27,36,37,43)])```### Item-restscore```{r, normal restscore.6}RIrestscore(df_ls)```### Item-restscore-bootstrapped```{r,bootstrap restscore.6}RIbootRestscore(df_ls,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA.6}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_ls)```### Residual correlations```{r, Residual corr.6}source('mod_easyRasch_func.R') # Reloading function req. simcor1 <-RIgetResidCor(df_ls, iterations =500, cpu = n_cores) # 500RIresidcorr(df_ls, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma.6}RIpartgamLD(df_ls)```### 1st contrast loadings```{r, contrast loadings.6}RIloadLoc(df_ls)```### Response categories```{r, resp categories.6}#| fig-height: 7mirt(df_ls, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again.6}#| fig-height: 7# increase fig-height above as needed, if you have many itemsRItargeting(df_ls)```### Item hierarchy```{r, Item hierarchy.6}#| fig-height: 7RIitemHierarchy(df_ls)```### Score groups```{r, score groups.6}iarm::score_groups(as.data.frame(df_ls)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_ls %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_ls, model ="PCM")item_obsexp(PCM(df_ls))grouping_based_on_score =score_groups(as.data.frame(df_ls))partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex.6}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_ls, dif.sex)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.sex) ```### DIF-age```{r, dif-age.6}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_ls, dif.age)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.age) ```### DIF-education```{r, dif-edu.6}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_ls, dif.edu)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr.6}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_ls, dif.yr)partgam_DIF(dat.items =as.data.frame(df_ls),dat.exo = dif.yr) ```:::## Analysis part 6 decision Testing to move on with this scale. # Analysis conclusion Resulting LSAS-SR reworked scale thus has item as seen in margin below## Resulting items ```{r,subset_chosen_item fin}what_scale <-gsub('\\.','',items_to_use)``````{r, listmargins-subbed fin}#| column: marginRIlistItemsMargin(df_ls, fontsize =12)```## Reliability ```{r, rel}RItif(df_ls)``````{r, rel 2}RItif(df_ls,samplePSI=TRUE)```## Person parameter ```{r, personfit}RIpfit(df_ls)```## Misfit check ```{r}pfit_u3poly <- PerFit::U3poly(matrix = df_ls, Ncat =4, # make sure to input number of response categories, not thresholdsIRT.PModel ="PCM")misfits <- PerFit::flagged.resp(pfit_u3poly) %>%pluck("Scores") %>%as.data.frame() %>%pull(FlaggedID)misfits2 <-RIpfit(df_ls, output ="rowid")```::: panel-tabset### All resp ```{r}RIitemfit(df_ls, simcut = simfit1)```### U3 misfit removed```{r}RIitemfit(df_ls[-misfits,], simcut = simfit1)RItif(df_ls[-misfits,])```### ZSTD removed ```{r}RIitemfit(df_ls[-misfits2,], simcut = simfit1)RItif(df_ls[-misfits2,])```:::## Item parameters```{r, item params}RIitemparams(df_ls)item_param_matrix <-RIitemparams(df_ls,output='dataframe') %>%as.matrix(.)write.csv(item_param_matrix,file=paste0("./results/item_params_",what_scale,'.csv'),row.names=TRUE)```## Ordinal sum to interval score ```{r, RI_score}RIscoreSE(df_ls)RIscoreSE(df_ls,output='figure')sum_to_latent <-RIscoreSE(df_ls,output ='dataframe')write.csv(sum_to_latent,file=paste0('./results/ordinal_sum_to_latent_',what_scale,'.csv'),row.names=FALSE)```## ThetasThe ordinal sum to interval score contains the information to transform into the below thetas, but we plot them here for convinience```{r, thetas}est_thetas2 <-RIestThetas(df_ls, method ="WLE")hist(est_thetas2$WLE, col ="#009ca6", main ="Histogram of person locations (thetas)", breaks =25)```# Software used```{r}pkgs <- grateful::cite_packages(cite.tidyverse =TRUE, output ="table",bib.file ="grateful-refs.bib",include.RStudio =FALSE,out.dir =getwd())formattable(pkgs, table.attr ='class=\"table table-striped\" style="font-size: 15px; font-family: Lato; width: 80%"')```# References